from substrate import LLMClient
from sentence_transformers import SentenceTransformer, util
import json
import os
import time

llm_client = LLMClient(use_dev_capacity=True)

if not os.path.exists(r"cache_data"):
    os.mkdir("cache_data")

# stores the embedding of a string when made for text-ada-embedding model in substrate
if not os.path.exists("cache_data/string_emb_ada.json"):
    with open("cache_data/string_emb_ada.json", "w") as file:
        json.dump({}, file)

with open("cache_data/string_emb_ada.json", "r") as file:
    cache_emb = json.load(file)

reiteration_counter = 0

def get_azure_llm_emb(string_list : list) -> list | None:
    global reiteration_counter
    req_string_list = []
    for text in string_list:
        if text not in cache_emb:
            req_string_list.append(text)
    
    if not req_string_list:
        final_emb_list = [cache_emb[text] for text in string_list]
        return final_emb_list
    
    # Using azure substrate LLM calls in this function
    
    # Adding time gap between successive LLM calls to avoid rate limit exceeded error
    time.sleep(5)
    added = 0
    try:
        llm_output = llm_client.send_request("dev-text-embedding-ada-002", {'input' : req_string_list})
        req_string_emb = [llm_output['data'][i]['embedding'] for i in range(len(req_string_list))]
    except Exception as e:
        reiteration_counter+= 1
        print(e)
        if reiteration_counter==10:
            reiteration_counter = 0
            print("Error getting embedding from substrate!")
            return None
        return get_azure_llm_emb(req_string_list)
    
    # we have a non empty emb list in this section
    for idx, text in enumerate(req_string_list):
        cache_emb[text] = req_string_emb[idx]
        added+=1

    final_emb_list = [cache_emb[text] for text in string_list]
    reiteration_counter = 0
    print("Items added to embedding cache = {}".format(added))
    with open("cache_data/string_emb_ada.json", "w") as file:
        json.dump(cache_emb, file)
        
    return final_emb_list

